home *** CD-ROM | disk | FTP | other *** search
Text File | 1988-12-06 | 48.0 KB | 1,708 lines |
- Path: xanth!nic.MR.NET!hal!cwjcc!ukma!mailrus!ulowell!page
- From: page@swan.ulowell.edu (Bob Page)
- Newsgroups: comp.sources.amiga
- Subject: v02i086: regexp - regular-expression routines, Part02/02
- Message-ID: <10485@swan.ulowell.edu>
- Date: 5 Dec 88 22:06:40 GMT
- Organization: University of Lowell, Computer Science Dept.
- Lines: 1697
- Approved: page@swan.ulowell.edu
-
- Submitted-by: grwalter@watcgl.waterloo.edu
- Posting-number: Volume 2, Issue 86
- Archive-name: unix/regexp.2
-
- # This is a shell archive.
- # Remove everything above and including the cut line.
- # Then run the rest of the file through sh.
- #----cut here-----cut here-----cut here-----cut here----#
- #!/bin/sh
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # regerror.c
- # regexp.3
- # regexp.c
- # regexp.h
- # regexp.lib.uue
- # regmagic.h
- # regsub.c
- # This archive created: Mon Nov 28 19:50:58 1988
- cat << \SHAR_EOF > regerror.c
- #include <stdio.h>
-
- void
- regerror(s)
- char *s;
- {
- #ifdef ERRAVAIL
- error("regexp: %s", s);
- #else
- fprintf(stderr, "regexp(3): %s", s);
- exit(1);
- #endif
- /* NOTREACHED */
- }
- SHAR_EOF
- cat << \SHAR_EOF > regexp.3
- .TH REGEXP 3 local
- .DA 2 April 1986
- .SH NAME
- regcomp, regexec, regsub, regerror \- regular expression handler
- .SH SYNOPSIS
- .ft B
- .nf
- #include <regexp.h>
-
- regexp *regcomp(exp)
- char *exp;
-
- int regexec(prog, string)
- regexp *prog;
- char *string;
-
- regsub(prog, source, dest)
- regexp *prog;
- char *source;
- char *dest;
-
- regerror(msg)
- char *msg;
- .SH DESCRIPTION
- These functions implement
- .IR egrep (1)-style
- regular expressions and supporting facilities.
- .PP
- .I Regcomp
- compiles a regular expression into a structure of type
- .IR regexp ,
- and returns a pointer to it.
- The space has been allocated using
- .IR malloc (3)
- and may be released by
- .IR free .
- .PP
- .I Regexec
- matches a NUL-terminated \fIstring\fR against the compiled regular expression
- in \fIprog\fR.
- It returns 1 for success and 0 for failure, and adjusts the contents of
- \fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.
- .PP
- The members of a
- .I regexp
- structure include at least the following (not necessarily in order):
- .PP
- .RS
- char *startp[NSUBEXP];
- .br
- char *endp[NSUBEXP];
- .RE
- .PP
- where
- .I NSUBEXP
- is defined (as 10) in the header file.
- Once a successful \fIregexec\fR has been done using the \fIregexp\fR,
- each \fIstartp\fR-\fIendp\fR pair describes one substring
- within the \fIstring\fR,
- with the \fIstartp\fR pointing to the first character of the substring and
- the \fIendp\fR pointing to the first character following the substring.
- The 0th substring is the substring of \fIstring\fR that matched the whole
- regular expression.
- The others are those substrings that matched parenthesized expressions
- within the regular expression, with parenthesized expressions numbered
- in left-to-right order of their opening parentheses.
- .PP
- .I Regsub
- copies \fIsource\fR to \fIdest\fR, making substitutions according to the
- most recent \fIregexec\fR performed using \fIprog\fR.
- Each instance of `&' in \fIsource\fR is replaced by the substring
- indicated by \fIstartp\fR[\fI0\fR] and
- \fIendp\fR[\fI0\fR].
- Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by
- the substring indicated by
- \fIstartp\fR[\fIn\fR] and
- \fIendp\fR[\fIn\fR].
- To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';
- to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with
- another `\e'.
- .PP
- .I Regerror
- is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,
- or \fIregsub\fR.
- The default \fIregerror\fR writes the string \fImsg\fR,
- with a suitable indicator of origin,
- on the standard
- error output
- and invokes \fIexit\fR(2).
- .I Regerror
- can be replaced by the user if other actions are desirable.
- .SH "REGULAR EXPRESSION SYNTAX"
- A regular expression is zero or more \fIbranches\fR, separated by `|'.
- It matches anything that matches one of the branches.
- .PP
- A branch is zero or more \fIpieces\fR, concatenated.
- It matches a match for the first, followed by a match for the second, etc.
- .PP
- A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.
- An atom followed by `*' matches a sequence of 0 or more matches of the atom.
- An atom followed by `+' matches a sequence of 1 or more matches of the atom.
- An atom followed by `?' matches a match of the atom, or the null string.
- .PP
- An atom is a regular expression in parentheses (matching a match for the
- regular expression), a \fIrange\fR (see below), `.'
- (matching any single character), `^' (matching the null string at the
- beginning of the input string), `$' (matching the null string at the
- end of the input string), a `\e' followed by a single character (matching
- that character), or a single character with no other significance
- (matching that character).
- .PP
- A \fIrange\fR is a sequence of characters enclosed in `[]'.
- It normally matches any single character from the sequence.
- If the sequence begins with `^',
- it matches any single character \fInot\fR from the rest of the sequence.
- If two characters in the sequence are separated by `\-', this is shorthand
- for the full list of ASCII characters between them
- (e.g. `[0-9]' matches any decimal digit).
- To include a literal `]' in the sequence, make it the first character
- (following a possible `^').
- To include a literal `\-', make it the first or last character.
- .SH AMBIGUITY
- If a regular expression could match two different parts of the input string,
- it will match the one which begins earliest.
- If both begin in the same place but match different lengths, or match
- the same length in different ways, life gets messier, as follows.
- .PP
- In general, the possibilities in a list of branches are considered in
- left-to-right order, the possibilities for `*', `+', and `?' are
- considered longest-first, nested constructs are considered from the
- outermost in, and concatenated constructs are considered leftmost-first.
- The match that will be chosen is the one that uses the earliest
- possibility in the first choice that has to be made.
- If there is more than one choice, the next will be made in the same manner
- (earliest possibility) subject to the decision on the first choice.
- And so forth.
- .PP
- For example, `(ab|a)b*c' could match `abc' in one of two ways.
- The first choice is between `ab' and `a'; since `ab' is earlier, and does
- lead to a successful overall match, it is chosen.
- Since the `b' is already spoken for,
- the `b*' must match its last possibility\(emthe empty string\(emsince
- it must respect the earlier choice.
- .PP
- In the particular case where no `|'s are present and there is only one
- `*', `+', or `?', the net effect is that the longest possible
- match will be chosen.
- So `ab*', presented with `xabbbby', will match `abbbb'.
- Note that if `ab*' is tried against `xabyabbbz', it
- will match `ab' just after `x', due to the begins-earliest rule.
- (In effect, the decision on where to start the match is the first choice
- to be made, hence subsequent choices must respect it even if this leads them
- to less-preferred alternatives.)
- .SH SEE ALSO
- egrep(1), expr(1)
- .SH DIAGNOSTICS
- \fIRegcomp\fR returns NULL for a failure
- (\fIregerror\fR permitting),
- where failures are syntax errors, exceeding implementation limits,
- or applying `+' or `*' to a possibly-null operand.
- .SH HISTORY
- Both code and manual page were
- written at U of T.
- They are intended to be compatible with the Bell V8 \fIregexp\fR(3),
- but are not derived from Bell code.
- .SH BUGS
- Empty branches and empty regular expressions are not portable to V8.
- .PP
- The restriction against
- applying `*' or `+' to a possibly-null operand is an artifact of the
- simplistic implementation.
- .PP
- Does not support \fIegrep\fR's newline-separated branches;
- neither does the V8 \fIregexp\fR(3), though.
- .PP
- Due to emphasis on
- compactness and simplicity,
- it's not strikingly fast.
- It does give special attention to handling simple cases quickly.
- SHAR_EOF
- cat << \SHAR_EOF > regexp.c
- /*
- * regcomp and regexec -- regsub and regerror are elsewhere @(#)regexp.c 1.3
- * of 18 April 87
- *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer. Not
- * derived from licensed software.
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions:
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it.
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission.
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- *
- * Beware that some of this code is subtly aware of the way operator precedence
- * is structured in regular expressions. Serious changes in
- * regular-expression syntax might require a total rethink.
- */
- #include <stdio.h>
- #ifdef AMIGA
- #undef min
- #include "regexp.h"
- #else
- #include <regexp.h>
- #endif
- #include "regmagic.h"
-
- /*
- * The "internal use only" fields in regexp.h are present to pass info from
- * compile to execute that permits the execute phase to run lots faster on
- * simple cases. They are:
- *
- * regstart char that must begin a match; '\0' if none obvious reganch
- * is the match anchored (at beginning-of-line only)? regmust string
- * (pointer into program) that match must include, or NULL regmlen
- * length of regmust string
- *
- * Regstart and reganch permit very fast decisions on suitable starting points
- * for a match, cutting down the work a lot. Regmust permits fast rejection
- * of lines that cannot possibly match. The regmust tests are costly enough
- * that regcomp() supplies a regmust only if the r.e. contains something
- * potentially expensive (at present, the only such thing detected is * or +
- * at the start of the r.e., which can involve a lot of backup). Regmlen is
- * supplied because the test in regexec() needs it and regcomp() is computing
- * it anyway.
- */
-
- /*
- * Structure for regexp "program". This is essentially a linear encoding of
- * a nondeterministic finite-state machine (aka syntax charts or "railroad
- * normal form" in parsing technology). Each node is an opcode plus a "next"
- * pointer, possibly plus an operand. "Next" pointers of all nodes except
- * BRANCH implement concatenation; a "next" pointer with a BRANCH on both
- * ends of it is connecting two alternatives. (Here we have one of the
- * subtle syntax dependencies: an individual BRANCH (as opposed to a
- * collection of them) is never concatenated with anything because of
- * operator precedence.) The operand of some types of node is a literal
- * string; for others, it is a node leading into a sub-FSM. In particular,
- * the operand of a BRANCH node is the first node of the branch. (NB this is
- * *not* a tree structure: the tail of the branch connects to the thing
- * following the set of BRANCHes.) The opcodes are:
- */
-
- /* definition number opnd? meaning */
- #define END 0 /* no End of program. */
- #define BOL 1 /* no Match "" at beginning of line. */
- #define EOL 2 /* no Match "" at end of line. */
- #define ANY 3 /* no Match any one character. */
- #define ANYOF 4 /* str Match any character in this string. */
- #define ANYBUT 5 /* str Match any character not in this
- * string. */
- #define BRANCH 6 /* node Match this alternative, or the
- * next... */
- #define BACK 7 /* no Match "", "next" ptr points backward. */
- #define EXACTLY 8 /* str Match this string. */
- #define NOTHING 9 /* no Match empty string. */
- #define STAR 10 /* node Match this (simple) thing 0 or more
- * times. */
- #define PLUS 11 /* node Match this (simple) thing 1 or more
- * times. */
- #define OPEN 20 /* no Mark this point in input as start of
- * #n. */
- /* OPEN+1 is number 1, etc. */
- #define CLOSE 30 /* no Analogous to OPEN. */
-
- /*
- * Opcode notes:
- *
- * BRANCH The set of branches constituting a single choice are hooked together
- * with their "next" pointers, since precedence prevents anything being
- * concatenated to any individual branch. The "next" pointer of the last
- * BRANCH in a choice points to the thing following the whole choice. This
- * is also where the final "next" pointer of each individual branch points;
- * each branch starts with the operand node of a BRANCH node.
- *
- * BACK Normal "next" pointers all implicitly point forward; BACK exists to
- * make loop structures possible.
- *
- * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
- * BRANCH structures using BACK. Simple cases (one character per match) are
- * implemented with STAR and PLUS for speed and to minimize recursive
- * plunges.
- *
- * OPEN,CLOSE ...are numbered at compile time.
- */
-
- /*
- * A node is one char of opcode followed by two chars of "next" pointer.
- * "Next" pointers are stored as two 8-bit pieces, high order first. The
- * value is a positive offset from the opcode of the node containing it. An
- * operand, if any, simply follows the node. (Note that much of the code
- * generation knows about this implicit relationship.)
- *
- * Using two bytes for the "next" pointer is vast overkill for most things, but
- * allows patterns to get big without disasters.
- */
- #define OP(p) (*(p))
- #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
- #define OPERAND(p) ((p) + 3)
-
- /*
- * See regmagic.h for one further detail of program structure.
- */
-
-
- /*
- * Utility definitions.
- */
- #ifndef CHARBITS
- #define UCHARAT(p) ((int)*(unsigned char *)(p))
- #else
- #define UCHARAT(p) ((int)*(p)&CHARBITS)
- #endif
-
- #define FAIL(m) { regerror(m); return(NULL); }
- #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
- #define META "^$.[()|?+*\\"
-
- /*
- * Flags to be passed up and down.
- */
- #define HASWIDTH 01 /* Known never to match null string. */
- #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
- #define SPSTART 04 /* Starts with * or +. */
- #define WORST 0 /* Worst case. */
-
- /*
- * Global work variables for regcomp().
- */
- static char *regparse; /* Input-scan pointer. */
- static int regnpar; /* () count. */
- static char regdummy;
- static char *regcode; /* Code-emit pointer; ®dummy = don't. */
- static long regsize; /* Code size. */
-
- /*
- * Forward declarations for regcomp()'s friends.
- */
- #ifndef STATIC
- #define STATIC static
- #endif
- STATIC char *reg();
- STATIC char *regbranch();
- STATIC char *regpiece();
- STATIC char *regatom();
- STATIC char *regnode();
- STATIC char *regnext();
- STATIC void regc();
- STATIC void reginsert();
- STATIC void regtail();
- STATIC void regoptail();
- #ifdef STRCSPN
- int strcspn();
- #endif
-
- /*
- * - regcomp - compile a regular expression into internal code
- *
- * We can't allocate space until we know how big the compiled form will be, but
- * we can't compile it (and thus know how big it is) until we've got a place
- * to put the code. So we cheat: we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the code
- * and thus invalidate pointers into it. (Note that it has to be in one
- * piece because free() must be able to free it all.)
- *
- * Beware that the optimization-preparation code in here knows about some of the
- * structure of the compiled regexp.
- */
- regexp *
- regcomp(exp)
- char *exp;
- {
- register regexp *r;
- register char *scan;
- register char *longest;
- register int len;
- int flags;
- extern char *malloc();
-
- if (exp == NULL)
- FAIL("NULL argument");
-
- /* First pass: determine size, legality. */
- regparse = exp;
- regnpar = 1;
- regsize = 0L;
- regcode = ®dummy;
- regc(MAGIC);
- if (reg(0, &flags) == NULL)
- return (NULL);
-
- /* Small enough for pointer-storage convention? */
- if (regsize >= 32767L) /* Probably could be 65535L. */
- FAIL("regexp too big");
-
- /* Allocate space. */
- r = (regexp *) malloc(sizeof(regexp) + (unsigned) regsize);
- if (r == NULL)
- FAIL("out of space");
-
- /* Second pass: emit code. */
- regparse = exp;
- regnpar = 1;
- regcode = r->program;
- regc(MAGIC);
- if (reg(0, &flags) == NULL)
- return (NULL);
-
- /* Dig out information for optimizations. */
- r->regstart = '\0'; /* Worst-case defaults. */
- r->reganch = 0;
- r->regmust = NULL;
- r->regmlen = 0;
- scan = r->program + 1; /* First BRANCH. */
- if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- scan = OPERAND(scan);
-
- /* Starting-point info. */
- if (OP(scan) == EXACTLY)
- r->regstart = *OPERAND(scan);
- else if (OP(scan) == BOL)
- r->reganch++;
-
- /*
- * If there's something expensive in the r.e., find the longest
- * literal string that must appear and make it the regmust. Resolve
- * ties in favor of later strings, since the regstart check works
- * with the beginning of the r.e. and avoiding duplication
- * strengthens checking. Not a strong reason, but sufficient in the
- * absence of others.
- */
- if (flags & SPSTART) {
- longest = NULL;
- len = 0;
- for (; scan != NULL; scan = regnext(scan))
- if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
- longest = OPERAND(scan);
- len = strlen(OPERAND(scan));
- }
- r->regmust = longest;
- r->regmlen = len;
- }
- }
- return (r);
- }
-
- /*
- * - reg - regular expression, i.e. main body or parenthesized thing
- *
- * Caller must absorb opening parenthesis.
- *
- * Combining parenthesis handling with the base level of regular expression is a
- * trifle forced, but the need to tie the tails of the branches to what
- * follows makes it hard to avoid.
- */
- static char *
- reg(paren, flagp)
- int paren; /* Parenthesized? */
- int *flagp;
- {
- register char *ret;
- register char *br;
- register char *ender;
- register int parno;
- int flags;
-
- *flagp = HASWIDTH; /* Tentatively. */
-
- /* Make an OPEN node, if parenthesized. */
- if (paren) {
- if (regnpar >= NSUBEXP)
- FAIL("too many ()");
- parno = regnpar;
- regnpar++;
- ret = regnode(OPEN + parno);
- } else
- ret = NULL;
-
- /* Pick up the branches, linking them together. */
- br = regbranch(&flags);
- if (br == NULL)
- return (NULL);
- if (ret != NULL)
- regtail(ret, br); /* OPEN -> first. */
- else
- ret = br;
- if (!(flags & HASWIDTH))
- *flagp &= ~HASWIDTH;
- *flagp |= flags & SPSTART;
- while (*regparse == '|') {
- regparse++;
- br = regbranch(&flags);
- if (br == NULL)
- return (NULL);
- regtail(ret, br); /* BRANCH -> BRANCH. */
- if (!(flags & HASWIDTH))
- *flagp &= ~HASWIDTH;
- *flagp |= flags & SPSTART;
- }
-
- /* Make a closing node, and hook it on the end. */
- ender = regnode((paren) ? CLOSE + parno : END);
- regtail(ret, ender);
-
- /* Hook the tails of the branches to the closing node. */
- for (br = ret; br != NULL; br = regnext(br))
- regoptail(br, ender);
-
- /* Check for proper termination. */
- if (paren && *regparse++ != ')') {
- FAIL("unmatched ()");
- } else if (!paren && *regparse != '\0') {
- if (*regparse == ')') {
- FAIL("unmatched ()");
- } else
- FAIL("junk on end");/* "Can't happen". */
- /* NOTREACHED */
- }
- return (ret);
- }
-
- /*
- * - regbranch - one alternative of an | operator
- *
- * Implements the concatenation operator.
- */
- static char *
- regbranch(flagp)
- int *flagp;
- {
- register char *ret;
- register char *chain;
- register char *latest;
- int flags;
-
- *flagp = WORST; /* Tentatively. */
-
- ret = regnode(BRANCH);
- chain = NULL;
- while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
- latest = regpiece(&flags);
- if (latest == NULL)
- return (NULL);
- *flagp |= flags & HASWIDTH;
- if (chain == NULL) /* First piece. */
- *flagp |= flags & SPSTART;
- else
- regtail(chain, latest);
- chain = latest;
- }
- if (chain == NULL) /* Loop ran zero times. */
- (void) regnode(NOTHING);
-
- return (ret);
- }
-
- /*
- * - regpiece - something followed by possible [*+?]
- *
- * Note that the branching code sequences used for ? and the general cases of *
- * and + are somewhat optimized: they use the same NOTHING node as both the
- * endmarker for their branch list and the body of the last branch. It might
- * seem that this node could be dispensed with entirely, but the endmarker
- * role is not redundant.
- */
- static char *
- regpiece(flagp)
- int *flagp;
- {
- register char *ret;
- register char op;
- register char *next;
- int flags;
-
- ret = regatom(&flags);
- if (ret == NULL)
- return (NULL);
-
- op = *regparse;
- if (!ISMULT(op)) {
- *flagp = flags;
- return (ret);
- }
- if (!(flags & HASWIDTH) && op != '?')
- FAIL("*+ operand could be empty");
- *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);
-
- if (op == '*' && (flags & SIMPLE))
- reginsert(STAR, ret);
- else if (op == '*') {
- /* Emit x* as (x&|), where & means "self". */
- reginsert(BRANCH, ret); /* Either x */
- regoptail(ret, regnode(BACK)); /* and loop */
- regoptail(ret, ret); /* back */
- regtail(ret, regnode(BRANCH)); /* or */
- regtail(ret, regnode(NOTHING)); /* null. */
- } else if (op == '+' && (flags & SIMPLE))
- reginsert(PLUS, ret);
- else if (op == '+') {
- /* Emit x+ as x(&|), where & means "self". */
- next = regnode(BRANCH); /* Either */
- regtail(ret, next);
- regtail(regnode(BACK), ret); /* loop back */
- regtail(next, regnode(BRANCH)); /* or */
- regtail(ret, regnode(NOTHING)); /* null. */
- } else if (op == '?') {
- /* Emit x? as (x|) */
- reginsert(BRANCH, ret); /* Either x */
- regtail(ret, regnode(BRANCH)); /* or */
- next = regnode(NOTHING);/* null. */
- regtail(ret, next);
- regoptail(ret, next);
- }
- regparse++;
- if (ISMULT(*regparse))
- FAIL("nested *?+");
-
- return (ret);
- }
-
- /*
- * - regatom - the lowest level
- *
- * Optimization: gobbles an entire sequence of ordinary characters so that it
- * can turn them into a single node, which is smaller to store and faster to
- * run. Backslashed characters are exceptions, each becoming a separate
- * node; the code is simpler that way and it's not worth fixing.
- */
- static char *
- regatom(flagp)
- int *flagp;
- {
- register char *ret;
- int flags;
-
- *flagp = WORST; /* Tentatively. */
-
- switch (*regparse++) {
- case '^':
- ret = regnode(BOL);
- break;
- case '$':
- ret = regnode(EOL);
- break;
- case '.':
- ret = regnode(ANY);
- *flagp |= HASWIDTH | SIMPLE;
- break;
- case '[':{
- register int class;
- register int classend;
-
- if (*regparse == '^') { /* Complement of range. */
- ret = regnode(ANYBUT);
- regparse++;
- } else
- ret = regnode(ANYOF);
- if (*regparse == ']' || *regparse == '-')
- regc(*regparse++);
- while (*regparse != '\0' && *regparse != ']') {
- if (*regparse == '-') {
- regparse++;
- if (*regparse == ']' || *regparse == '\0')
- regc('-');
- else {
- class = UCHARAT(regparse - 2) + 1;
- classend = UCHARAT(regparse);
- if (class > classend + 1)
- FAIL("invalid [] range");
- for (; class <= classend; class++)
- regc(class);
- regparse++;
- }
- } else
- regc(*regparse++);
- }
- regc('\0');
- if (*regparse != ']')
- FAIL("unmatched []");
- regparse++;
- *flagp |= HASWIDTH | SIMPLE;
- }
- break;
- case '(':
- ret = reg(1, &flags);
- if (ret == NULL)
- return (NULL);
- *flagp |= flags & (HASWIDTH | SPSTART);
- break;
- case '\0':
- case '|':
- case ')':
- FAIL("internal urp"); /* Supposed to be caught earlier. */
- break;
- case '?':
- case '+':
- case '*':
- FAIL("?+* follows nothing");
- break;
- case '\\':
- if (*regparse == '\0')
- FAIL("trailing \\");
- ret = regnode(EXACTLY);
- regc(*regparse++);
- regc('\0');
- *flagp |= HASWIDTH | SIMPLE;
- break;
- default:{
- register int len;
- register char ender;
-
- regparse--;
- len = strcspn(regparse, META);
- if (len <= 0)
- FAIL("internal disaster");
- ender = *(regparse + len);
- if (len > 1 && ISMULT(ender))
- len--; /* Back off clear of ?+* operand. */
- *flagp |= HASWIDTH;
- if (len == 1)
- *flagp |= SIMPLE;
- ret = regnode(EXACTLY);
- while (len > 0) {
- regc(*regparse++);
- len--;
- }
- regc('\0');
- }
- break;
- }
-
- return (ret);
- }
-
- /*
- * - regnode - emit a node
- */
- static char * /* Location. */
- regnode(op)
- char op;
- {
- register char *ret;
- register char *ptr;
-
- ret = regcode;
- if (ret == ®dummy) {
- regsize += 3;
- return (ret);
- }
- ptr = ret;
- *ptr++ = op;
- *ptr++ = '\0'; /* Null "next" pointer. */
- *ptr++ = '\0';
- regcode = ptr;
-
- return (ret);
- }
-
- /*
- * - regc - emit (if appropriate) a byte of code
- */
- static void
- regc(b)
- char b;
- {
- if (regcode != ®dummy)
- *regcode++ = b;
- else
- regsize++;
- }
-
- /*
- * - reginsert - insert an operator in front of already-emitted operand
- *
- * Means relocating the operand.
- */
- static void
- reginsert(op, opnd)
- char op;
- char *opnd;
- {
- register char *src;
- register char *dst;
- register char *place;
-
- if (regcode == ®dummy) {
- regsize += 3;
- return;
- }
- src = regcode;
- regcode += 3;
- dst = regcode;
- while (src > opnd)
- *--dst = *--src;
-
- place = opnd; /* Op node, where operand used to be. */
- *place++ = op;
- *place++ = '\0';
- *place++ = '\0';
- }
-
- /*
- * - regtail - set the next-pointer at the end of a node chain
- */
- static void
- regtail(p, val)
- char *p;
- char *val;
- {
- register char *scan;
- register char *temp;
- register int offset;
-
- if (p == ®dummy)
- return;
-
- /* Find last node. */
- scan = p;
- for (;;) {
- temp = regnext(scan);
- if (temp == NULL)
- break;
- scan = temp;
- }
-
- if (OP(scan) == BACK)
- offset = scan - val;
- else
- offset = val - scan;
- *(scan + 1) = (offset >> 8) & 0377;
- *(scan + 2) = offset & 0377;
- }
-
- /*
- * - regoptail - regtail on operand of first argument; nop if operandless
- */
- static void
- regoptail(p, val)
- char *p;
- char *val;
- {
- /* "Operandless" and "op != BRANCH" are synonymous in practice. */
- if (p == NULL || p == ®dummy || OP(p) != BRANCH)
- return;
- regtail(OPERAND(p), val);
- }
-
- /*
- * regexec and friends
- */
-
- /*
- * Global work variables for regexec().
- */
- static char *reginput; /* String-input pointer. */
- static char *regbol; /* Beginning of input, for ^ check. */
- static char **regstartp; /* Pointer to startp array. */
- static char **regendp; /* Ditto for endp. */
-
- /*
- * Forwards.
- */
- STATIC int regtry();
- STATIC int regmatch();
- STATIC int regrepeat();
-
- #ifdef DEBUG
- int regnarrate = 0;
- void regdump();
- STATIC char *regprop();
- #endif
-
- /*
- * - regexec - match a regexp against a string
- */
- int
- regexec(prog, string)
- register regexp *prog;
- register char *string;
- {
- register char *s;
- extern char *strchr();
-
- /* Be paranoid... */
- if (prog == NULL || string == NULL) {
- regerror("NULL parameter");
- return (0);
- }
- /* Check validity of program. */
- if (UCHARAT(prog->program) != MAGIC) {
- regerror("corrupted program");
- return (0);
- }
- /* If there is a "must appear" string, look for it. */
- if (prog->regmust != NULL) {
- s = string;
- while ((s = strchr(s, prog->regmust[0])) != NULL) {
- if (strncmp(s, prog->regmust, prog->regmlen) == 0)
- break; /* Found it. */
- s++;
- }
- if (s == NULL) /* Not present. */
- return (0);
- }
- /* Mark beginning of line for ^ . */
- regbol = string;
-
- /* Simplest case: anchored match need be tried only once. */
- if (prog->reganch)
- return (regtry(prog, string));
-
- /* Messy cases: unanchored match. */
- s = string;
- if (prog->regstart != '\0')
- /* We know what char it must start with. */
- while ((s = strchr(s, prog->regstart)) != NULL) {
- if (regtry(prog, s))
- return (1);
- s++;
- }
- else
- /* We don't -- general case. */
- do {
- if (regtry(prog, s))
- return (1);
- } while (*s++ != '\0');
-
- /* Failure. */
- return (0);
- }
-
- /*
- * - regtry - try match at specific point
- */
- static int /* 0 failure, 1 success */
- regtry(prog, string)
- regexp *prog;
- char *string;
- {
- register int i;
- register char **sp;
- register char **ep;
-
- reginput = string;
- regstartp = prog->startp;
- regendp = prog->endp;
-
- sp = prog->startp;
- ep = prog->endp;
- for (i = NSUBEXP; i > 0; i--) {
- *sp++ = NULL;
- *ep++ = NULL;
- }
- if (regmatch(prog->program + 1)) {
- prog->startp[0] = string;
- prog->endp[0] = reginput;
- return (1);
- } else
- return (0);
- }
-
- /*
- * - regmatch - main matching routine
- *
- * Conceptually the strategy is simple: check to see whether the current node
- * matches, call self recursively to see whether the rest matches, and then
- * act accordingly. In practice we make some effort to avoid recursion, in
- * particular by going through "ordinary" nodes (that don't need to know
- * whether the rest of the match failed) by a loop instead of by recursion.
- */
- static int /* 0 failure, 1 success */
- regmatch(prog)
- char *prog;
- {
- register char *scan; /* Current node. */
- char *next; /* Next node. */
- extern char *strchr();
-
- scan = prog;
- #ifdef DEBUG
- if (scan != NULL && regnarrate)
- fprintf(stderr, "%s(\n", regprop(scan));
- #endif
- while (scan != NULL) {
- #ifdef DEBUG
- if (regnarrate)
- fprintf(stderr, "%s...\n", regprop(scan));
- #endif
- next = regnext(scan);
-
- switch (OP(scan)) {
- case BOL:
- if (reginput != regbol)
- return (0);
- break;
- case EOL:
- if (*reginput != '\0')
- return (0);
- break;
- case ANY:
- if (*reginput == '\0')
- return (0);
- reginput++;
- break;
- case EXACTLY:{
- register int len;
- register char *opnd;
-
- opnd = OPERAND(scan);
- /* Inline the first character, for speed. */
- if (*opnd != *reginput)
- return (0);
- len = strlen(opnd);
- if (len > 1 && strncmp(opnd, reginput, len) != 0)
- return (0);
- reginput += len;
- }
- break;
- case ANYOF:
- if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
- return (0);
- reginput++;
- break;
- case ANYBUT:
- if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
- return (0);
- reginput++;
- break;
- case NOTHING:
- break;
- case BACK:
- break;
- case OPEN + 1:
- case OPEN + 2:
- case OPEN + 3:
- case OPEN + 4:
- case OPEN + 5:
- case OPEN + 6:
- case OPEN + 7:
- case OPEN + 8:
- case OPEN + 9:{
- register int no;
- register char *save;
-
- no = OP(scan) - OPEN;
- save = reginput;
-
- if (regmatch(next)) {
- /*
- * Don't set startp if some later invocation of the same
- * parentheses already has.
- */
- if (regstartp[no] == NULL)
- regstartp[no] = save;
- return (1);
- } else
- return (0);
- }
- break;
- case CLOSE + 1:
- case CLOSE + 2:
- case CLOSE + 3:
- case CLOSE + 4:
- case CLOSE + 5:
- case CLOSE + 6:
- case CLOSE + 7:
- case CLOSE + 8:
- case CLOSE + 9:{
- register int no;
- register char *save;
-
- no = OP(scan) - CLOSE;
- save = reginput;
-
- if (regmatch(next)) {
- /*
- * Don't set endp if some later invocation of the same
- * parentheses already has.
- */
- if (regendp[no] == NULL)
- regendp[no] = save;
- return (1);
- } else
- return (0);
- }
- break;
- case BRANCH:{
- register char *save;
-
- if (OP(next) != BRANCH) /* No choice. */
- next = OPERAND(scan); /* Avoid recursion. */
- else {
- do {
- save = reginput;
- if (regmatch(OPERAND(scan)))
- return (1);
- reginput = save;
- scan = regnext(scan);
- } while (scan != NULL && OP(scan) == BRANCH);
- return (0);
- /* NOTREACHED */
- }
- }
- break;
- case STAR:
- case PLUS:{
- register char nextch;
- register int no;
- register char *save;
- register int min;
-
- /*
- * Lookahead to avoid useless match attempts when we know
- * what character comes next.
- */
- nextch = '\0';
- if (OP(next) == EXACTLY)
- nextch = *OPERAND(next);
- min = (OP(scan) == STAR) ? 0 : 1;
- save = reginput;
- no = regrepeat(OPERAND(scan));
- while (no >= min) {
- /* If it could work, try it. */
- if (nextch == '\0' || *reginput == nextch)
- if (regmatch(next))
- return (1);
- /* Couldn't or didn't -- back up. */
- no--;
- reginput = save + no;
- }
- return (0);
- }
- break;
- case END:
- return (1); /* Success! */
- break;
- default:
- regerror("memory corruption");
- return (0);
- break;
- }
-
- scan = next;
- }
-
- /*
- * We get here only if there's trouble -- normally "case END" is the
- * terminating point.
- */
- regerror("corrupted pointers");
- return (0);
- }
-
- /*
- * - regrepeat - repeatedly match something simple, report how many
- */
- static int
- regrepeat(p)
- char *p;
- {
- register int count = 0;
- register char *scan;
- register char *opnd;
-
- scan = reginput;
- opnd = OPERAND(p);
- switch (OP(p)) {
- case ANY:
- count = strlen(scan);
- scan += count;
- break;
- case EXACTLY:
- while (*opnd == *scan) {
- count++;
- scan++;
- }
- break;
- case ANYOF:
- while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
- count++;
- scan++;
- }
- break;
- case ANYBUT:
- while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
- count++;
- scan++;
- }
- break;
- default: /* Oh dear. Called inappropriately. */
- regerror("internal foulup");
- count = 0; /* Best compromise. */
- break;
- }
- reginput = scan;
-
- return (count);
- }
-
- /*
- * - regnext - dig the "next" pointer out of a node
- */
- static char *
- regnext(p)
- register char *p;
- {
- register int offset;
-
- if (p == ®dummy)
- return (NULL);
-
- offset = NEXT(p);
- if (offset == 0)
- return (NULL);
-
- if (OP(p) == BACK)
- return (p - offset);
- else
- return (p + offset);
- }
-
- #ifdef DEBUG
-
- STATIC char *regprop();
-
- /*
- * - regdump - dump a regexp onto stdout in vaguely comprehensible form
- */
- void
- regdump(r)
- regexp *r;
- {
- register char *s;
- register char op = EXACTLY; /* Arbitrary non-END op. */
- register char *next;
- extern char *strchr();
-
-
- s = r->program + 1;
- while (op != END) { /* While that wasn't END last time... */
- op = OP(s);
- printf("%2d%s", s - r->program, regprop(s)); /* Where, what. */
- next = regnext(s);
- if (next == NULL) /* Next ptr. */
- printf("(0)");
- else
- printf("(%d)", (s - r->program) + (next - s));
- s += 3;
- if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
- /* Literal string, where present. */
- while (*s != '\0') {
- putchar(*s);
- s++;
- }
- s++;
- }
- putchar('\n');
- }
-
- /* Header fields of interest. */
- if (r->regstart != '\0')
- printf("start `%c' ", r->regstart);
- if (r->reganch)
- printf("anchored ");
- if (r->regmust != NULL)
- printf("must have \"%s\"", r->regmust);
- printf("\n");
- }
-
- /*
- * - regprop - printable representation of opcode
- */
- static char *
- regprop(op)
- char *op;
- {
- register char *p;
- static char buf[50];
-
- (void) strcpy(buf, ":");
-
- switch (OP(op)) {
- case BOL:
- p = "BOL";
- break;
- case EOL:
- p = "EOL";
- break;
- case ANY:
- p = "ANY";
- break;
- case ANYOF:
- p = "ANYOF";
- break;
- case ANYBUT:
- p = "ANYBUT";
- break;
- case BRANCH:
- p = "BRANCH";
- break;
- case EXACTLY:
- p = "EXACTLY";
- break;
- case NOTHING:
- p = "NOTHING";
- break;
- case BACK:
- p = "BACK";
- break;
- case END:
- p = "END";
- break;
- case OPEN + 1:
- case OPEN + 2:
- case OPEN + 3:
- case OPEN + 4:
- case OPEN + 5:
- case OPEN + 6:
- case OPEN + 7:
- case OPEN + 8:
- case OPEN + 9:
- sprintf(buf + strlen(buf), "OPEN%d", OP(op) - OPEN);
- p = NULL;
- break;
- case CLOSE + 1:
- case CLOSE + 2:
- case CLOSE + 3:
- case CLOSE + 4:
- case CLOSE + 5:
- case CLOSE + 6:
- case CLOSE + 7:
- case CLOSE + 8:
- case CLOSE + 9:
- sprintf(buf + strlen(buf), "CLOSE%d", OP(op) - CLOSE);
- p = NULL;
- break;
- case STAR:
- p = "STAR";
- break;
- case PLUS:
- p = "PLUS";
- break;
- default:
- regerror("corrupted opcode");
- break;
- }
- if (p != NULL)
- (void) strcat(buf, p);
- return (buf);
- }
- #endif
-
- /*
- * The following is provided for those people who do not have strcspn() in
- * their C libraries. They should get off their butts and do something about
- * it; at least one public-domain implementation of those (highly useful)
- * string routines has been published on Usenet.
- */
- #ifdef STRCSPN
- /*
- * strcspn - find length of initial segment of s1 consisting entirely of
- * characters not from s2
- */
-
- static int
- strcspn(s1, s2)
- char *s1;
- char *s2;
- {
- register char *scan1;
- register char *scan2;
- register int count;
-
- count = 0;
- for (scan1 = s1; *scan1 != '\0'; scan1++) {
- for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
- if (*scan1 == *scan2++)
- return (count);
- count++;
- }
- return (count);
- }
- #endif
- SHAR_EOF
- cat << \SHAR_EOF > regexp.h
- /*
- * Definitions etc. for regexp(3) routines.
- *
- * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
- * not the System V one.
- */
- #define NSUBEXP 10
- typedef struct regexp {
- char *startp[NSUBEXP];
- char *endp[NSUBEXP];
- char regstart; /* Internal use only. */
- char reganch; /* Internal use only. */
- char *regmust; /* Internal use only. */
- int regmlen; /* Internal use only. */
- char program[1]; /* Unwarranted chumminess with compiler. */
- } regexp;
-
- extern regexp *regcomp();
- extern int regexec();
- extern void regsub();
- extern void regerror();
- SHAR_EOF
- cat << \SHAR_EOF > regexp.lib.uue
- begin 644 regexp.lib
- M #^@ !?$ /I #$Y5 "_[ 90 "\M A(; 2&P 1$ZZ !/
- M[P ,2'@ 4ZZ !83TY=3G4 _@ ! 0 !( #[X8
- M )?7V)A<V4 $ &@P E]?>&-O=F8 0 J& "7U]I
- M;V( ! %H, )?9G!R:6YT9@ $ :@P E]E>&ET
- M 0 "8 #\@ ^H $<F5G97AP*#,I.B E<P _(
- M /I 14Y5_^Y(YP<PO^P &4 !*K0 (9PQ*K0 ,9P9*K0 09@Y(;
- M3KH %A/8 WG!:<@ @;0 ($C ( R! G&<.2&P %$ZZ !83V +PF
- M;0 ,)&T $!X34HM*!V< *8,!P F9@1\ & F# < 7&8>$A,, 0 P;18, 0 Y
- M;A!(@4C!4HL$@0 # L 6 "?/]*AFH># < 7&82$A,, 0!<9P8, 0 F9@0N
- M 5*+%(=2BF"D( 8B!N6!(&T "$JP& !GE$JP&"AGCB &(@;E@2 P&"B0L!@
- M*@ O!2\P& O"DZZ !/[P ,U<5*A6< _VA**O__9@#_8$AL #).N@ 6$]@
- M!$(24HI,WPS@3EU.=0 _@ # 0 /X !& ) /O
- MA@ E]?8F%S90 0 J# "7U]X8V]V9@ ! #H, -?
- M<F5G97)R;W( # ! @ $H H@P E]S=')N8W!Y 0
- M .8 #\@ ^H 23E5,3"!P87)M('1O(')E9W-U8@!D86UA9V5D
- M(')E9V5X<"!F960@=&\@<F5G<W5B !D86UA9V5D(&UA=&-H('-T<FEN9P
- M #\@ ^D 0+3E7_[$CG 3"_[ 90 $JM AF$$AL !.N@ 6$]P
- M & 50I;0 ( !P 2E 1"K .0>P "$AX )PI2 *80 (M%A/2&W_[$*G
- M80 !,%!/2H!F!G 8 !' RL !__P .;1!(; .3KH %A/< !@ $"("P
- M#@: 7"\ 3KH %A/)D"V_ 9A!(; >3KH %A/< !@ #8*6T "
- M< $I0 $($O0_ !:2'@ G"E( IA @Z6$](;?_L0J=A "V4$]*@&8&< !@
- M "B< 70 !0%T 49'()T@ 4B=( %8@2]#\ %LD2"\*80 .QEA/($ 2$$H!
- M9G)6BA(2# $ "&8(%VH P!08 A3 68$4BL 40@M +_[V=0D<@N""M(__2T
- M_ 9S@,$@ (9B8@2B!*5H@O"$ZZ !83["';10@2B!*5H@O""M(__1.N@
- M6$\N "\*80 .6EA/)$!@PB=M__0 4B=' %8@"TS?#(!.74YU3E7_[$CG 3"_
- M[ 90 ' !(&T #"" 2JT "&<X#*P * 1M$$AL "Q.N@ 6$]P &
- M 60N+ $4JP !" '( <&@ !0O &$ !N983R9 8 *7RTAM_^QA %$6$\D
- M0+3\ !F!G 8 !++;\ !G#"\*+PMA >*4$]@ B9*""T /_O9@H@;0 ,
- M"*@ #("W_[ * !"!M R!D"!L ,$ !\9DI2K 2&W_[&$ .A8
- M3R1 M/P &8&< !@ #0+PHO"V$ !S103P@M #_[V8*(&T # BH R M
- M_^P"@ 0@;0 ,@9!@K$JM AG#" '( <&@ !Y@ G +P!A 8B6$\O
- M "\+*T#_]&$ !N903R1+M/P &<8+RW_]"\*80 '1%!/+PIA T"6$\D0B
- M2JT "&<>(&P ! 04JP P "EG#DAL #A.N@ 6$]P & R2JT "&8J(&P
- M $H09R(,$ I9@Y(; !&3KH %A/< !@$$AL %1.N@ 6$]P & "( M,WPR
- M3EU.=4Y5__!(YP PO^P &4 @;0 (0I!(> &80 %=EA/)D"5RB!L !*
- M$&=:$A , 0!\9U(, 0 I9TQ(;?_P86!83RM __1*@&8$< !@2B M__ B *!
- M 2!M B#D+3\ !F#B M__ "@ 2!D& ,+RW_]"\*80 %Z%!/)&W_
- M]&">M/P &8*2'@ "6$ !0!83R +3-\, $Y=3G5.5?_R2.<!,+_L !E
- M2&W_\F$ =983R9 MOP &8&< !@ &^(&P !X0# < *F<<# < *V<6# <
- M/V<0("W_\B)M @B@" +8 !E@@M #_]686# < /V<02&P 8$ZZ !83W
- M8 !> P' "MG!' $8 )P 2!M @@@ P' "IF& @M '_]6<0+PM(> *80 $
- MS%!/8 !' P' "IF5B\+2'@ !F$ !+903TAX =A 0\6$\O "\+80 %=E!/
- M+PLO"V$ !6Q03TAX 9A 0>6$\O "\+80 $YE!/2'@ "6$ ! I83R\ +PMA
- M 324$]@ # # < *V88""T ?_U9Q O"TAX MA 124$]@ "B# < *V94
- M2'@ !F$ \Y83R1 +PHO"V$ !)103TAX =A .X6$\O"R\ 80 $@%!/2'@
- M!F$ Z183R\ +PIA 1L4$](> )80 #D%A/+P O"V$ !%A03V! < /V9
- M+PM(> &80 #XE!/2'@ !F$ VA83R\ +PMA 0P4$](> )80 #5%A/)$ O
- M"B\+80 $&E!/+PHO"V$ !()03U*L @; $A , 0 J9PP, 0 K9P8, 0 _
- M9@Y(; !Z3KH %A/< !@ B +3-\,@$Y=3G5.5?_P2.</$+_L !E (&T
- M"$*0(&P ! 02(!2K <DA=06L D*P>Q (9O1.^Q $ %Q@ 'B "I@ ',
- M "M@ '& #]@ ' "E@ &J 'Q@ &D !@ &> "A@ %H %M@ !. "Y@
- M N "1@ 8 %Y@ "2'@ 6$ H183R9 8 "<DAX )A )T6$\F0&
- M F)(> #80 "9%A/)D @;0 ( ) #8 "2"!L ,$ !>9A)(> %80 "
- M0%A/)D!2K 8 Q(> $80 "+EA/)D @; $A , 0!=9P8, 0 M9A!(@4C!
- M4JP "\!80 "3EA/(&P $H09P CA(0# $ 76< (0, 0 M9F)2K (&P
- M !(0# $ 76<$2@%F#$AX "UA (66$]@QE6(< 0$%* +@!\ "!L <$" &
- M( 92@+Z ;Q!(; "&3KH %A/< !@ &:OH9N#"\'80 !W%A/4H=@\%*L !@
- MA"!L 0$$B 2,!2K +P!A &\6$]@ /]L0J=A &P6$\@; #! 76<0
- M2&P F$ZZ !83W 8 !2%*L @;0 ( ) #8 !-$AM__A(> !80#Y
- M_E!/)D"V_ 9@9P & 1H@+?_X H %(&T "(&08 !!$AL *9.N@
- M6$]P & /9(; "T3KH %A/< !@ #F(&P $H09A!(; #(3KH %A/< !@
- M #.2'@ "&$ ,Y83R9 (&P ! 02(!(P%*L O &$ /I83T*G80 \EA/
- M(&T " "0 V )13K 2&P U"\L !.N@ 4$\J $J%;@Y(; #@3KH
- M %A/< !@;B!L 8,%@ #(4 !;Q0,! J9PP,! K9P8,! _9@)3A2!M
- M @(Z ,,A0 %F!@CH $ TAX AA-%A/)D!*A6\:(&P ! 02(!(
- MP%*L O &$ %Y83U.%8.)"IV$ %)83R +3-\(\$Y=3G5.5?_X2.< ,+_L
- M !E )FP "D'L BQRV8(5JP #B +8!@D2Q2M M2BG %(!2BA2 4HHI
- M2@ *( M,WPP 3EU.=4Y5 "_[ 90 $'L @B+ *L<%G#"!!$*T "U*L
- M I@!%*L Y.74YU3E7_]$CG #"_[ 90 $'L @B+ *L<%F!E:L Y@
- M."9!5JP "B1L JW[0 ,8PA3BE.+%)-@\B!M P0K0 +*TC_]%*(< 0@"M(
- M__12B!" *TC_]%*(3-\, $Y=3G5.5?_T2.<!,+_L !E 0>P ""(M BQ
- MP6=.)D$O"V$ !A983R1 M/P &<$)DI@[ P3 =F"B +D*T #"X 8 H@+0 ,
- M(@N0@2X ( <B!^"! H$ #_%T$ 2 '( <"@ /\70 "3-\,@$Y=3G5.
- M50 O^P &4 !*K0 (9R9![ ((BT "+'!9QH@00P0 9F$B!M A6B"\M
- M PO"&$ _UI03TY=3G5.5?_\2.< ,+_L !E )FT ""1M RV_ 9P:T
- M_ 9A!(; #R3KH %A/< !@ #B<%IR !(S" ,@0 )QG$$AL 0).N@
- M6$]P & ,)*JP!29T@K2O_\(&L 4A 02(!(P"\ +RW__$ZZ !03RM __Q*
- M@&<<+RL 5B\K %(O $ZZ !/[P ,2H!G!E*M__Q@QDJM__QF!' 8'(I2@ 6
- M2BL 46<*+PHO"V%J4$]@7BM*__Q**P!09S(0*P!02(!(P"\ +RW__$ZZ !0
- M3RM __Q*@&<V+P O"V$Z4$]*@&<$< %@*%*M__Q@SB\M__PO"V$B4$]*@&<$
- M< %@$"!M__P0$%*M__Q* &;@< !,WPP 3EU.=4Y5__1(YP$PO^P &4 I
- M;0 , !(@;0 (*4@ &M#\ "@F;0 ((FT "-+\ "@D27X**4@ 'DJ';PZ1R":(
- M6(LDB%B*4X=@[B!M C0_ !;+PAA)%A/2H!G$B!M @@K0 ,(6P $@ H< %@
- M!' 3G%,WPR 3EU.=4Y5_^I(YP\PO^P &4 F;0 (MOP &< Q0O"V$
- M ]983Q(32($K0/_X#$$ *&0 N;E04[[$ )@ +88 FF *A@ "T8 !
- M$F 3Y@ 'R8 "SF +9@ +&8 ".F C9@ *L8 "J& J1@ *@
- M8 "G& IA@ *48 "D& HQ@ $N8 !*F 29@ $B8 !'F 1I@
- M $68 !$F 0Y@ )D8 !2F 49@ %"8 !/F 3I@ $V8 !,F
- M 2Y@ $J(&P $K'L !9G )"< !@ )0(&P $DH09P ",G 8 "0"!L !)*
- M$&8&< !@ (R4JP $F A8@2R!+5H@D2!(2(&P $K(09P9P & A(O"DZZ
- M !83RX #(< !;QHO!R\L !(O"DZZ !/[P ,2H!G!G 8 !YM^L !)@
- M '*(&P $DH09QHB2R)+5HD0$$B 2, O "\)3KH %!/2H!F!G 8 !ME*L
- M !)@ &:(&P $DH09QHB2R)+5HD0$$B 2, O "\)3KH %!/2H!G!G 8 !
- MAE*L !)@ %J$!-(@$C !( 4+ K; 2__ O+?_X80#^*%A/2H!G'" &
- M(@;E@2!L !I*L!@ 9@8AK?_P& !P 6 4!P & 3H0$TB 2, $@ !XJ
- M "ML !+_\"\M__AA /WD6$]*@&<<( 4B!>6!(&P 'DJP& !F!B&M__ 8 ' !
- M8 _' 8 ]B!M__@,$ &9PX@2R!+5H@K2/_X8 RBML !+_]"!+($M6
- MB"\(80#]DEA/2H!G!G !8 P"EM__0 $B\+80 !<%A/)D"V_ 9P8,$P &
- M9\9P & )YX "!M__@,$ (9@08* ##!, "F8$< !@ G !*VP $O_N($L@
- M2U:(+P@K0/_J87183RM __(B+?_RLJW_ZFTR2@1G"B!L !(2$+($9A(O+?_X
- M80#]#EA/2H!G!' !8#Q3K?_R(&W_[M'M__(I2 28,1P & F< %@(DAL 11.
- MN@ 6$]P & 4)FW_^& _.A(; $F3KH %A/< !,WPSP3EU.=4Y5__1(YP$P
- MO^P &4 !^ "9L !(@;0 (5H@D2"!M @0$$B !$ VUN#$ !FQHXT!.
- M^P "8 I@(F ^8%I@6& .+PM.N@ 6$\N -?'8%02$K(39DY2AU*+8/1*$V=$
- M$!-(@$C +P O"DZZ !03TJ 9S!2AU*+8.)*$V<F$!-(@$C +P O"DZZ !0
- M3TJ 9A)2AU*+8.)(; $Z3KH %A/?@ I2P 2( =,WPR 3EU.=4Y5__Q(YP$0
- MO^P &4 F;0 (0>P "+'+9@1P &!$$"L 4B 2, "@ /_A@!(K )(
- M@4C! H$ #_T($N $J'9@1P & :#!, !V8*($L@2Y''( A@"B!+($O1QR (
- M3G%,WPB 3EU.=0 #^ !4 ! /I@ #O@ [B *>@ "EH
- M @R ('@ !]( >Z 'J@ !U@ <& %F !"( ,, "_@
- M MP &J E &H 8 60 ( _4 /L@ #QX [4 .
- MK #H0 Y( .* #?0 W: -L #98 V -6 #5 TH
- M -( #0@ SF ,U #,8 RV ,J #*0 NT +A "VP
- M MD *V "@P F8 )3@ "4H E" ).@ "38 D> )&
- M"0H D& ([ "-( C* (Q@ ")0 B* (0 ""( @: '
- M^ !^X ?* ': !TX <V '+ !R8 ;V &S@ !LH :P
- M &I !HX 9\ &9@ !<X 7& %@ !7P /L #2 O
- M +2 "S C( (H !O@ ;H &D N@ *P "F >@
- M &0 ! . #0 P *@ /OA@ E]?8F%S90
- M#P #\@ \4 +T@ "UH H^ )_@ "9 DN (_@ "+X 6X
- M #S RP &* "H, )?7WAC;W9F \ _, /& "]8
- M M> *0@ "@( F4 ),@ "0( C" %O ] ,P !C@
- M Z# #7W)E9V5R<F]R % #ZH [\ .Y@ "GX I> (
- M-@ !]8 >^ 'K@ !UP <* %G !"8 ,0 # @ N &N
- M F &X <@P E]M86QL;V, 0 (:# "7W-T<FQE;@
- M $ /4@ #/8 %: !1(, )?<W1R8W-P;@ $ @F@P E]S
- M=')C:'( !@ #Y8 ]X -< #4 L$ *I(, )?<W1R;F-M
- M< ( T. *O /R #Z@ %-.54Q,(&%R9W5M96YT ')E
- M9V5X<"!T;V\@8FEG !O=70@;V8@<W!A8V4 '1O;R!M86YY("@I '5N;6%T
- M8VAE9" H*0 =6YM871C:&5D("@I !J=6YK(&]N(&5N9 J*R!O<&5R86YD
- M(&-O=6QD(&)E(&5M<'1Y &YE<W1E9" J/RL &EN=F%L:60@6UT@<F%N9V4
- M '5N;6%T8VAE9"!;70 :6YT97)N86P@=7)P _*RH@9F]L;&]W<R!N;W1H
- M:6YG '1R86EL:6YG(%P %XD+ELH*7P_*RI< &EN=&5R;F%L(&1I<V%S=&5R
- M $Y53$P@<&%R86UE=&5R !C;W)R=7!T960@<')O9W)A;0!M96UO<GD@8V]R
- M<G5P=&EO;@!C;W)R=7!T960@<&]I;G1E<G, &EN=&5R;F%L(&9O=6QU<
- M _( /K "0 _( /[ 3@"B ')E9V5R<F]R+F\ 7V5X:70
- M7V9P<FEN=&8 7U]I;V( 7U]X8V]V9@!?7V)A<V4 7W)E9V5R<F]R %]?3452
- M1T5$ ')E9W-U8BYO %]S=')N8W!Y %]R96=S=6( <F5G97AP+F\ 7W-T<FYC
- M;7 7W-T<F-H<@!?<W1R8W-P;@!?<W1R;&5N %]M86QL;V, 7W)E9V5X96,
- M7W)E9V-O;7 $ " # /I 4 "P 1 !H ( H $ , $ .@ $
- M ^H $, -@ " 10/I 0 2P O " * ! %4 ! #H $@/J
- M != +, P ! L#Z0 ( &4 ;@!V '\ AP O " * " ) *- ! )D !
- 6 #H 4P/J Z D#ZP '\
-
- end
- SHAR_EOF
- cat << \SHAR_EOF > regmagic.h
- /*
- * The first byte of the regexp internal "program" is actually this magic
- * number; the start node begins in the second byte.
- */
- #define MAGIC 0234
- SHAR_EOF
- cat << \SHAR_EOF > regsub.c
- /*
- * regsub @(#)regsub.c 1.3 of 2 April 86
- *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer. Not
- * derived from licensed software.
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions:
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it.
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission.
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- */
- #include <stdio.h>
- #ifdef AMIGA
- #include "regexp.h"
- #else
- #include <regexp.h>
- #endif
- #include "regmagic.h"
-
- #ifndef CHARBITS
- #define UCHARAT(p) ((int)*(unsigned char *)(p))
- #else
- #define UCHARAT(p) ((int)*(p)&CHARBITS)
- #endif
-
- /*
- * - regsub - perform substitutions after a regexp match
- */
- void
- regsub(prog, source, dest)
- regexp *prog;
- char *source;
- char *dest;
- {
- register char *src;
- register char *dst;
- register char c;
- register int no;
- register int len;
- extern char *strncpy();
-
- if (prog == NULL || source == NULL || dest == NULL) {
- regerror("NULL parm to regsub");
- return;
- }
- if (UCHARAT(prog->program) != MAGIC) {
- regerror("damaged regexp fed to regsub");
- return;
- }
- src = source;
- dst = dest;
- while ((c = *src++) != '\0') {
- if (c == '&')
- no = 0;
- else if (c == '\\' && '0' <= *src && *src <= '9')
- no = *src++ - '0';
- else
- no = -1;
-
- if (no < 0) { /* Ordinary character. */
- if (c == '\\' && (*src == '\\' || *src == '&'))
- c = *src++;
- *dst++ = c;
- } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
- len = prog->endp[no] - prog->startp[no];
- (void) strncpy(dst, prog->startp[no], len);
- dst += len;
- if (len != 0 && *(dst - 1) == '\0') { /* strncpy hit NUL. */
- regerror("damaged match string");
- return;
- }
- }
- }
- *dst++ = '\0';
- }
- SHAR_EOF
- # End of shell archive
- exit 0
- --
- Bob Page, U of Lowell CS Dept. page@swan.ulowell.edu ulowell!page
- Have five nice days.
-